#delimit;
** REPLACE FILE PATH WITH PATH TO RELEVANT REPLICATION FILES;
local fileloc = "~/KMS_REPLICATION";
set logtype text;
capture log close templog;

log using `fileloc'/log_files/daily_to_weekly_weather.txt, name(templog) replace;
set more off;
pause on;
clear all;

** First, only keep those close enough to be needed;
use `fileloc'/data/location_data/weather_to_zip_distance.dta, clear;
keep if distance <= 20;
keep usaf wban;
sample 1, by(usaf wban) count;
destring usaf, replace force;
destring wban, replace force;
sort usaf wban;
tempfile distance;
save `distance';

use `fileloc'/data/weather_data/daily_weather.dta, clear;
rename stn usaf;
sort usaf wban;
joinby usaf wban using `distance';

gen date = mdy(month, day, year);
format date %td;

rename wdsp windspeed;
rename dewp dewpoint;
rename slp sea_pressure;
rename stp station_pressure;
rename max max_temp;
rename rain rainy_day;
rename prcp rain;
rename fog foggy_day;

**Change missing from 9's to ".";
replace max_temp = . if max_temp >= float(9999);
replace rain = . if rain >= float(99.99);
replace windspeed = . if windspeed == float(999.9);
replace dewpoint = . if dewpoint >= float(9999);
replace sea_pressure = . if sea_pressure >= float(9999);
replace station_pressure = . if station_pressure >= float(9999);

keep usaf wban month date dewpoint sea_pressure station_pressure windspeed max_temp rain foggy_day rainy_day temp;

sort usaf wban;

tempfile weather;
save `weather';

** Adding elevation information, used to calculate specific humidity;
use `fileloc'/data/location_data/weather_locations.dta, clear;
keep elevation usaf wban;
destring usaf, replace force;
destring wban, replace force;
sort usaf wban;
joinby usaf wban using `weather';

** Generate specific humidity - uses elevation, sea level pressure, and dewpoint;
**get elevation in meters - it's in 1/10 meters ;
replace elevation=elevation/10 ;
** replace missing sea level pressure with average for state;
egen sea_level_avg = mean(sea_pressure), by(date);
replace sea_pressure = sea_level_avg if sea_pressure == .;
drop sea_level_avg;

**convert sea level pressure to station pressure (slpM is sea level pressure in inches mercury) ;
gen slpM=sea_pressure/33.8639 ;
replace station_pressure = 33.8639*slpM*((288 - .0065*elevation)/288)^5.2561 if station_pressure == .;

**convert to Celsius (Specific humidity formula that NOAA has uses Celsius);
foreach var in temp dewpoint { ;
	gen `var'C = (5/9)*(`var'-32) ;
} ;

**calculate MEAN humidity ;
gen E=6.11*10^(7.5*dewpointC/(237.7+dewpointC)) ;
gen Es=6.11*10^(7.5*tempC/(237.7+tempC)) ;

gen humidR=E/Es*100 ;

**calculate SPECIFIC humidity water (g)/ air (kg) ;
gen humidS=1000*.622*E/(station_pressure-.378*E) ;

label var humidR "relative humidity (%)" ;
label var humidS "specific humidity (g/kg)" ;
label var E "actual vapor pressure" ;
label var Es "saturated vapor pressure" ;

***************************************************************************
************** CONVERTING DAILY VALUES INTO WEEKLY VALUES *****************
**************************************************************************;

gen week = wofd(date);
gen year = year(date);
		
duplicates tag usaf wban date, g(duplicates);
tab duplicates;
** Drop only 2 observed duplicate observations;
drop if duplicates == 1;
drop duplicates;

collapse (mean) rain humidS windspeed max_temp (sum) days_with_fog = foggy_day days_with_rain = rainy_day, by(usaf wban week) fast;

foreach var in rain humidS max_temp windspeed days_with_rain days_with_fog {;
	drop if `var' == .;
};

gen year = year(dofw(week));

save `fileloc'/data/weather_data/weekly_weather.dta, replace;

**XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
**XXXXXXXXXXXXXXXXXXXXXX MAKING BALANCED PANELS XXXXXXXXXXXXXXXXXXXXXXXXXXX
**XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX;

** Generate balanced panel for relevant times;
use `fileloc'/data/weather_data/weekly_weather.dta, clear;

** CREATE BALANCED PANEL ;
keep if year >= 2002 & year <= 2007;
bysort usaf wban : egen minyear = min(year);
bysort usaf wban : egen maxyear = max(year);
keep if minyear <= 2002 & maxyear == 2007;

** Restrict to those present in all years;
foreach temp_factor in max_temp rain humidS windspeed days_with_rain days_with_fog {;
	forvalues year = 2002/2007 {;
		gen `temp_factor'_`year' = (`temp_factor' ~= . & year == `year');
		egen `temp_factor'_`year'_around = max(`temp_factor'_`year'), by(usaf wban);		
		drop `temp_factor'_`year';		
	};
	replace `temp_factor' = . if `temp_factor'_2002_around == 0 | `temp_factor'_2003_around == 0 | `temp_factor'_2004_around == 0 | `temp_factor'_2005_around == 0 | `temp_factor'_2006_around == 0 | `temp_factor'_2007_around == 0 ;
	
	drop *_around;

};

keep if max_temp ~= . | rain ~= . | humidS ~= . | windspeed ~= . | days_with_rain ~= . | days_with_fog ~= .;

** How many sensors?;
gsort usaf wban, g(howmany);
sum howmany;
drop howmany;

keep week usaf wban rain humidS windspeed max_temp days_with_fog days_with_rain;

save `fileloc'/data/weather_data/weekly_weather_KMS.dta, replace;

log close templog;
